library(readxl)
library(dplyr)
library(tidyr)
library(psych)
# Set the working directory to the folder containing the Excel sheets
#setwd("C:/Users/esteb/Desktop/FinalCorpAdds")
### Classify Text Function.
if (FALSE) {
  classify_texts3 <- function(texts, api_key) {
    url <- "https://api.openai.com/v1/chat/completions"
    
    # Initialize the conversation history
    conversation_history <- list()
    
    # Function to append user and assistant messages to the conversation history
    add_message_to_history <- function(role, content) {
      conversation_history <<- append(conversation_history, list(list(role = role, content = content)))
    }
    
    results <- sapply(texts, function(text) {
      # Add user message to conversation history
      add_message_to_history("user", text)
      
      # Create payload with conversation history
      payload <- list(
        model = "gpt-4o",
        messages = conversation_history,
        max_tokens = 1000
      )
      
      jsonPayload <- toJSON(payload, auto_unbox = TRUE)
      
      response <- NULL
      max_retries <- 5
      retry_count <- 0
      success <- FALSE
      
      while (!success && retry_count < max_retries) {
        response <- POST(url, 
                         add_headers(Authorization = paste("Bearer", api_key),
                                     'OpenAI-Project'="proj_A0FjGjt29Rg92yrXBaVfxyPQ",
                                     'Content-Type' = "application/json"
                         ),
                         body = jsonPayload,
                         encode = "json"
        )
        
        if (status_code(response) == 429) {
          Sys.sleep(10)
          retry_count <- retry_count + 1
        } else {
          success <- TRUE
        }
      }
      
      if (!success) {
        stop("Failed to get a response from the API after multiple attempts due to rate limiting.")
      }
      
      result <- content(response, as = "parsed")
      assistant_response <- result$choices[[1]]$message$content
      
      add_message_to_history("assistant", assistant_response)
      
      return(assistant_response)
    })
    
    return(results)
  }
}

### PROMPTS ###
### negative prompts ###
P1 <- "Please briefly review the following transcript from a police show and summarize its main points. Focus on key elements of the scenes, character actions, and any major themes. Limit your response to 4-5 sentences."
# Condensed Prompt 2: Identifying Negative Police Actions
prompt2_neg <- "Review the transcript and identify if any negative actions by police officers are depicted, including:\n
1. Police officers taking bribes, committing crimes, assaulting women, or engaging in corruption\n
2. Police using unjustified excessive force\n
3. Racial profiling by police\n
4. Police failing to solve the case due to their own actions or negligence\n
5. Demonstrations of sexism or homophobia by police\n
For each identified action, provide a brief example or specific line from the transcript. Clearly explain why the example fits the category. Limit each example to 2-3 sentences or 100 characters."

# Condensed Prompt 3: Binary Classification for Negative Actions
prompt3_neg <- "Based on your analysis, classify each negative action category with 1 (yes) or 0 (no):\n
1. Police officers taking bribes, committing crimes, assaulting women, or engaging in corruption\n
2. Police using unjustified excessive force\n
3. Racial profiling by police\n
4. Police failing to solve the case due to their own actions or negligence\n
5. Demonstrations of sexism or homophobia by police\n
Only use 1 or 0 in your response. Keep explanations concise, and limit examples to 2-3 sentences or 100 characters."

# Condensed Prompt 4: Final Classification for Negative Actions
prompt4_neg <- "Provide the final classification for each category based on your analysis. Ensure that the identified actions are portrayed negatively, and not used as justifications in the pursuit of justice or to stop crime. Classify each action as follows:\n
1. Police officers taking bribes, committing crimes, assaulting women, or engaging in corruption\n
2. Police using unjustified excessive force\n
3. Racial profiling by police\n
4. Police failing to solve the case due to their own actions or negligence\n
5. Demonstrations of sexism or homophobia by police\n
Use 1 if the action is present and portrayed negatively, without being justified in pursuit of justice or stopping crime. Use 0 if the action is absent or portrayed positively. Include brief notes if necessary. Limit any examples to 2-3 sentences or 100 characters."
###
promptholderneg <- list(prompt2_neg, prompt3_neg, prompt4_neg)

## positive prompts ### 
# Prompt 2: Identifying Positive Actions
prompt2_pos <- "Review the provided transcript and identify if any of the following positive actions are depicted involving police officers:\n
- Police officers portrayed as 'good guy' characters (e.g., showing integrity, fairness, or empathy)\n
- Wrongful actions by police (e.g., coercion, violence, corruption) portrayed as just, routine, necessary, or noble in the pursuit of justice\n
- Wrongful actions by police not normalized but portrayed as actions by a 'bad apple' officer\n
- Police solving the case or arresting the criminal(s)\n
- Police officers taking heroic action, such as putting themselves in danger for the sake of justice\n
- Police officers being harmed or injured in the line of duty\n\n
For each identified action, provide specific examples or lines from the transcript. Clearly explain why each example fits the category, ensuring you only consider actions performed by police officers, not criminals or other members of the criminal justice system."

# Prompt 3: Binary Classification for Positive Actions
prompt3_pos <- "Now, based on your analysis, provide a simple 1 (yes) or 0 (no) classification for each of the following positive categories:\n
- Police officers portrayed as 'good guy' characters\n
- Wrongful actions by police portrayed as just, routine, necessary, or noble in the pursuit of justice\n
- Wrongful actions by police not normalized but portrayed as actions by a 'bad apple' officer\n
- Police solving the case or arresting the criminal(s)\n
- Police officers taking heroic action\n
- Police officers harmed or injured in the line of duty\n\n
Again, please only consider actions taken by police characters and not by criminals or other members of the criminal justice system. Only use 1 or 0 in the classification and nowhere else in the response."

# Prompt 4: Final Classification for Positive Actions
prompt4_pos <- "Integrating your analysis, provide the final binary classification for each positive category. Use 1 if the action is present and portrayed positively, and 0 if it is not present or portrayed negatively. Ensure your final classifications accurately reflect both the occurrence of the actions and their portrayal in the show:\n
- Police officers portrayed as 'good guy' characters\n
- Wrongful actions by police portrayed as just, routine, necessary, or noble in the pursuit of justice\n
- Wrongful actions by police not normalized but portrayed as actions by a 'bad apple' officer\n
- Police solving the case or arresting the criminal(s)\n
- Police officers taking heroic action\n
- Police officers harmed or injured in the line of duty\n\n
Your response should only include 1s and 0s, with brief explanatory notes if needed. Ensure accuracy and consistency with previous analysis."
###
promptholderpos <- list(P1,prompt2_pos,prompt3_pos,prompt4_pos)
# Create a file list (filelistnew) of all Excel sheets in the directory
filelistnew2 <- list.files(pattern = "\\.xlsx$")
# 
#additional_combined_df_last <- data.frame()
#### NEGATIVE LABELING #### 

if(FALSE){for (file in filelistnew2) {
  
  # Load the current Excel sheet
  df <- read_excel(file)
  
  # Ensure column names are valid and manageable by renaming them early
  colnames(df) <- c("Episode", "Script")  # Assuming two columns with episode number and script text
  
  # Remove empty rows
  df_cleaned <- df %>% filter_all(any_vars(!is.na(.)))
  
  # Extract the show name from the filename (without extension)
  show_name <- tools::file_path_sans_ext(file)
  
  # Loop through each row (episode)
  for (i in 1:nrow(df_cleaned)) {
    
    # Get the episode script text
    episode_text <- df_cleaned[i, "Script"] 
    
    # Clean the text
    cleaned_text <- gsub("[^a-zA-Z0-9\\s\n]", "", episode_text, perl = TRUE)
    
    # Create the prompts (for negative classification)
    prompt1 <- paste(P1, cleaned_text)
    current_prompts <- c(prompt1, promptholderneg)
    
    # Classify the text using classify_texts3
    GPTOutNeg <- classify_texts3(current_prompts, api_key)
    
    # Store the results in a new dataframe row and add the show name
    new_row <- cbind(df_cleaned[i, ], as.data.frame(t(GPTOutNeg)), Show = show_name)
    
    # Append the new row to additional_combined_df
    additional_combined_df <- rbind(additional_combined_df, new_row)
    
  }
  
  # Optional: Save the additional_combined_df after processing each Excel sheet
  save(additional_combined_df, file = paste0("additional_classified_last", show_name, ".Rda"))
}}
### Positive Classification. Separate because of rate limit issues###
###
#positive_combined_df <- data.frame()
# Loop through each Excel file in filelistnew
if(FALSE){for (file in filelistnew2) {
  
  # Load the current Excel sheet
  df <- read_excel(file, col_names = FALSE)
  
  # Ensure column names are valid and manageable by renaming them early
  colnames(df) <- c("Episode", "Script")  # Assuming two columns with episode number and script text
  
  # Remove empty rows
  df_cleaned <- df %>% filter_all(any_vars(!is.na(.)))
  
  # Extract the show name from the filename (without extension)
  show_name <- tools::file_path_sans_ext(file)
  
  # Loop through each row (episode)
  for (i in 1:nrow(df_cleaned)) {
    
    # Get the episode script text
    episode_text <- df_cleaned[i, "Script"] 
    
    # Clean the text
    cleaned_text <- gsub("[^a-zA-Z0-9\\s\n]", "", episode_text, perl = TRUE)
    
    # Create the prompts (for positive classification)
    prompt1 <- paste(P1, cleaned_text)
    current_prompts <- c(prompt1, promptholderpos)  # Now using promptholderpos for positive classification
    
    # Classify the text using classify_texts3
    GPTOutPos <- classify_texts3(current_prompts, api_key)
    
    # Store the results in a new dataframe row and add the show name
    new_row <- cbind(df_cleaned[i, ], as.data.frame(t(GPTOutPos)), Show = show_name)
    
    # Append the new row to positive_combined_df
    positive_combined_df <- rbind(positive_combined_df, new_row)
    
  }
  
  # Optional: Save the positive_combined_df after processing each Excel sheet
  save(positive_combined_df, file = paste0("positive_classified", show_name, ".Rda"))
}}

## Done in batches so combine with previous labeled DFs
# Bind rows of new_combined_df and additional_combined_df (negative)
#negativelabs <- rbind(new_combined_df, additional_combined_df)
# Bind rows of combined_df and positive_combined_df (positive)
#positivelabs <- rbind(combined_df, positive_combined_df)
#### match with years for TS analysis. M to 1 merge
#matched_data <- inner_join(positivelabs, Listofshows, by = c("Show_Title" = "Show2", "Season_Number" = "Season"))
###
# Filter shows from Listofshows with Count > 2009
#listofshows_titles <- unique(Listofshows$Show2[which(Listofshows$Count > 2009)])
#negativelabs_titles <- unique(negativelabs$Show_Title)

# Perform an inner join to match negativelabs and Listofshows based on Show_Title and Season_Number
#matched_dataneg <- inner_join(negativelabs, Listofshows, by = c("Show_Title" = "Show2", "Season_Number" = "Season"))
#matched_datapos <- matched_data[which(matched_data$Count>2009),]
####
### Positive Factor Scores 
#matched_datapos$FinalPrompt <- sub("^1\\.", "", matched_datapos$Prompt4)
#matched_datapos$binary_classifications2 <- str_extract_all(matched_datapos$FinalPrompt, "\\b[01]\\b")

#classification_df_pos <- do.call(rbind, lapply(matched_datapos$binary_classifications2, function(x) {
#  if (length(x) < 6) c(x, rep(NA, 6 - length(x))) else x[1:6]
#}))
#classification_df_pos <- as.data.frame(classification_df_pos, stringsAsFactors = FALSE)
#classification_df_pos[] <- lapply(classification_df_pos, as.numeric)
#colnames(classification_df_pos) <- c("Class_1", "Class_2", "Class_3", "Class_4", "Class_5", "Class_6")
#matched_datapos <- cbind(matched_datapos, classification_df_pos)

### Negative Factor Scores
#matched_dataneg$FinalPrompt <- sub("^1\\.", "", matched_dataneg$Prompt4)
#matched_dataneg$binary_classifications2 <- str_extract_all(matched_dataneg$FinalPrompt, "\\b[01]\\b")

#classification_df_neg <- do.call(rbind, lapply(matched_dataneg$binary_classifications2, function(x) {
#  if (length(x) < 6) c(x, rep(NA, 6 - length(x))) else x[1:6]
#}))
#classification_df_neg <- as.data.frame(classification_df_neg, stringsAsFactors = FALSE)
#classification_df_neg[] <- lapply(classification_df_neg, as.numeric)
#colnames(classification_df_neg) <- c("Class_1", "Class_2", "Class_3", "Class_4", "Class_5", "Class_6")
#matched_dataneg <- cbind(matched_dataneg, classification_df_neg)

###
###
### Since the positive is having strange results due to the low variance 
# lets try a different approach
#matched_datapos <- matched_datapos %>%
#  rename(
#    good_guys = Class_1,
#    justified = Class_2,
#    bad_apples = Class_3,
#    solve_case = Class_4,
#    heroism = Class_5,
#    harmed_in_line_of_duty = Class_6
#  )

# Rename columns in matched_dataneg (Negative Traits)
#matched_dataneg <- matched_dataneg %>%
#  rename(
#    corruption = Class_1,
#    unjustified_excessive_force = Class_2,
#    racial_profiling = Class_3,
#    failure_to_solve_case = Class_4,
#    sexism = Class_5
#  )

# Create Traits by year and episode (binary presence)
#positive_traits_long <- matched_datapos %>%
#  select(Show_Title, Count, good_guys, solve_case, heroism,title) %>%
#  filter(Count >= 2010) %>%
#  pivot_longer(cols = good_guys:heroism, names_to = "Trait", values_to = "Value") %>%
#  mutate(Valence = "Positive")

# Step 2.2: Convert matched_dataneg to Long Format and Filter Data
#negative_traits_long <- matched_dataneg %>%
#  select(Show_Title, Count, corruption, racial_profiling, failure_to_solve_case,title) %>%
#  filter(Count >= 2010) %>%
#  pivot_longer(cols = corruption:failure_to_solve_case, names_to = "Trait", values_to = "Value") %>%
#  mutate(Valence = "Negative")
# stack trait presence variables to take average
#traits_combined <- bind_rows(positive_traits_long, negative_traits_long)
# rename traits for presentation
#trait_renames <- c(
#  "corruption" = "Corruption and Bribery",
#  "failure_to_solve_case" = "Failure to Solve Case",
#  "good_guys" = "Portrayal as Good Guys",
#  "heroism" = "Heroic Actions",
#  "racial_profiling" = "Racial Profiling",
#  "solve_case" = "Successful Case Resolution"
#)
# 
# Apply the renaming to the traits in the dataframe
#traits_combined$Trait <- recode(traits_combined$Trait, !!!trait_renames)
#
year_labels <- c(
  "2010" = "2010-2011",
  "2011" = "2011-2012",
  "2012" = "2012-2013",
  "2013" = "2013-2014",
  "2014" = "2014-2015",
  "2015" = "2015-2016",
  "2016" = "2016-2017",
  "2017" = "2017-2018",
  "2018" = "2018-2019",
  "2019" = "2019-2020",
  "2020" = "2020-2021",
  "2021" = "2021-2022",
  "2022" = "2022-2023"
)

# Apply the new year range labels to the 'Count' column in traits_combined
#traits_combined$Count <- recode(traits_combined$Count, !!!year_labels)
# New PLot
# Filter for relevant traits
load("DataForGPTFigures.RData")
selected_traits <- c("Corruption and Bribery",
                     "Failure to Solve Case",
                     "Heroic Actions",
                     "Portrayal as Good Guys",
                     "Racial Profiling",
                     "Successful Case Resolution")

filtered_data <- traits_combined %>%
  filter(Trait %in% selected_traits)

# Plot
ggplot(filtered_data, aes(x = Count, y = Value, color = Valence, linetype = Trait, group = interaction(Trait, Valence))) +
  stat_summary(fun = mean, geom = "line", size = 1) +
  labs(title = "Positive and Negative Traits Over Time by Valence",
       x = "Year",
       y = "Average Value of Trait",
       color = "Valence",
       linetype = "Trait") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
###
library(RColorBrewer)

# Get the number of unique negative and positive traits
num_negative <- length(unique(traits_combined$Trait[traits_combined$Valence == "Negative"]))
num_positive <- length(unique(traits_combined$Trait[traits_combined$Valence == "Positive"]))

# Generate distinct shades for each valence category
pos_colors <- brewer.pal(max(3, num_positive), "Blues")  # Blue for positive traits
neg_colors <- brewer.pal(max(3, num_negative), "Reds")   # Red for negative traits

# Ensure key traits are more distinct by selecting stronger shades manually
library(ggplot2)
library(ggnewscale)
library(dplyr)

# Adjusted color shades for specific traits
pos_colors <- c(
  "Heroic Actions" = "#00008B",  # Dark Blue
  "Successful Case Resolution" = "#1E90FF",  # Dodger Blue (More distinct)
  "Justified Wrongful Actions" = "#4682B4",  # Steel Blue
  "Harmed in Line of Duty" = "#5F9EA0",  # Cadet Blue (Adjusted for balance)
  "Portrayal as Good Guys" = "#87CEEB"  # Light Sky Blue
)

neg_colors <- c(
  "Corruption and Bribery" = "#B22222",  # Dark Red
  "Excessive Force" = "#8B0000",  # Even Darker Red (Stands out more)
  "Portrayal as Good Guys" = "#A52A2A",  # Medium Red
  "Failure to Solve Case" = "#CD5C5C",  # Brown Red (New for contrast)
  "Racial Profiling" = "#DC143C",  # Crimson
  "Sexist/Homophobic Behavior" = "#E9967A"  # Sandy Brown Red
)

###
neg_colors <- c(
  "Corruption and Bribery" = "#8B0000",  # Dark Red (Very distinct and bold)
  "Excessive Force" = "#B22222",  # Firebrick Red (Strong visibility)
  "Portrayal as Good Guys" = "#FF4500",  # Dark Brownish Red
  "Failure to Solve Case" = "#CD5C5C",  # Light Coral Red (for contrast)
  "Racial Profiling" = "#DC143C",  # Crimson (Vibrant red)
  "Sexist/Homophobic Behavior" = "#A52A2A"  # Orange-Red (distinct, bold)
)

###
# Assign colors to traits based on their valence
trait_colors <- setNames(
  c(pos_colors, neg_colors), 
  c(
    unique(traits_combined$Trait[traits_combined$Valence == "Positive"]),
    unique(traits_combined$Trait[traits_combined$Valence == "Negative"])
  )
)
neg_traits <- unique(traits_combined$Trait[traits_combined$Valence == "Negative"])
pos_traits <- unique(traits_combined$Trait[traits_combined$Valence == "Positive"])
###
year_breaks <- traits_combined %>%
  distinct(Count, Year_Mid) %>%
  arrange(Year_Mid)

###

###

library(ggplot2)
library(dplyr)
library(ggnewscale)  # Allows us to reset the legend

traits_combined$Year_Mid <- as.numeric(sub("^(\\d{4})-\\d{4}$", "\\1", traits_combined$Count)) + 0.5
###

###
### Show Chart. Using proxy to label valence.
# negative if ANY of these negative things happen
negative_proxy <- matched_dataneg %>%
  group_by(Show_Title) %>%
  summarise(Proportion_One_Negative = mean(corruption + unjustified_excessive_force + racial_profiling + failure_to_solve_case + sexism > 0, na.rm = TRUE))
### Positive if 4/5 positives occur
positive_proxy <- matched_datapos %>%
  group_by(Show_Title) %>%
  summarise(Proportion_At_Least_Four_Positive = mean((good_guys + justified + solve_case + heroism + harmed_in_line_of_duty) >= 4, na.rm = TRUE))
## combine proxy values
show_proxy_summary <- inner_join(positive_proxy, negative_proxy, by = "Show_Title")
# rename for presentation
show_renames <- c(
  "blue bloods" = "Blue Bloods",
  "bones" = "Bones",
  "castle" = "Castle",
  "chicago pd" = "Chicago P.D.",
  "criminal minds" = "Criminal Minds",
  "csi" = "CSI",
  "csi miami" = "CSI: Miami",
  "csi: cyber" = "CSI: Cyber",
  "csi: ny" = "CSI: NY",
  "csi: vegas" = "CSI: Vegas",
  "elementary" = "Elementary",
  "fbi" = "FBI",
  "fbi most wanted" = "FBI: Most Wanted",
  "hawaii five-0" = "Hawaii Five-0",
  "lonestar" = "Lone Star",
  "mentalist" = "The Mentalist",
  "ncis" = "NCIS",
  "ncis: la" = "NCIS: LA",
  "ncis: no" = "NCIS: NO",
  "nine one one" = "9-1-1",
  "organized crime" = "Organized Crime",
  "shades of blue" = "Shades of Blue",
  "svu" = "SVU",
  "swat" = "SWAT",
  "the rookie" = "The Rookie"
)

# Apply the renaming to the 'Show_Title' column in the dataframe
show_proxy_summary$Show_Title <- recode(show_proxy_summary$Show_Title, !!!show_renames)
#
ggplot(show_proxy_summary, aes(x = Show_Title)) +
  # Add lines between positive and negative proxies
  geom_segment(aes(xend = Show_Title, y = Proportion_At_Least_Four_Positive, yend = Proportion_One_Negative), color = "gray", size = 1) +
  
  # Add blue points for highly positive episodes
  geom_point(aes(y = Proportion_At_Least_Four_Positive), color = "blue", size = 3) +
  
  # Add red points for episodes with negative depictions
  geom_point(aes(y = Proportion_One_Negative), color = "red", size = 3) +
  
  # Add axis labels and theme adjustments
  labs(x = "Show Title", y = "Proportion of Episodes") +
  
  # Theme adjustments for improved readability
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12))
### Final Bar PLot
ggplot(mean_based_df, aes(x = reorder(Show_Title, -Mean_Positive_Traits))) +
  geom_segment(aes(xend = Show_Title, y = Mean_Positive_Traits, yend = Mean_Negative_Traits), color = "gray", size = 1) +
  geom_point(aes(y = Mean_Positive_Traits), color = "blue", size = 3) +
  geom_point(aes(y = Mean_Negative_Traits), color = "red", size = 3) +
  labs(x = "Program Title", y = "Average Narrative Traits per Episode") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))
###
print(mean_based_df$Show_Title)
#



